package com.novel.crawl.bxwx9.task;

import com.novel.crawl.bxwx9.service.CrawlService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.scheduler.QueueScheduler;
import us.codecraft.webmagic.scheduler.component.DuplicateRemover;

/**
 * 爬虫任务
 *
 * @author 奔波儿灞
 * @since 1.0
 */
@Component
public class CrawlTask {

    private static final Logger LOG = LoggerFactory.getLogger(CrawlTask.class);

    @Autowired
    private CrawlService crawlService;

    /**
     * 调度
     */
    @Autowired
    private QueueScheduler scheduler;

    /**
     * 调度
     */
    @Autowired
    private QueueScheduler bookScheduler;

    /**
     * 启动5分钟后，每隔5分钟开始爬取
     */
    @Scheduled(initialDelay = 1000 * 60 * 5, fixedDelay = 1000 * 60 * 5)
    public void crawl() {
        LOG.info("begin crawl...");
        crawlService.crawl();
        cleanup();
        LOG.info("stop crawl...");
    }

    /**
     * 清理
     */
    private void cleanup() {
        DuplicateRemover remover = scheduler.getDuplicateRemover();
        LOG.info("clear scheduler, total request num: {}", remover.getTotalRequestsCount(null));
        remover.resetDuplicateCheck(null);
    }


    /**
     * 启动1分钟后，每隔一周开始爬取
     */
    @Scheduled(initialDelay = 1000 * 60, fixedDelay = 1000 * 60 * 60 * 24 * 7)
    public void crawlBook() {
        LOG.info("begin crawl...");
        crawlService.crawlBook();
        bookSchedulerCleanup();
        LOG.info("stop crawl...");
    }

    /**
     * 清理
     */
    private void bookSchedulerCleanup() {
        DuplicateRemover remover = bookScheduler.getDuplicateRemover();
        LOG.info("clear book scheduler, total request num: {}", remover.getTotalRequestsCount(null));
        remover.resetDuplicateCheck(null);
    }

}
